library(forecast)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
library(fpp2)
## Loading required package: fma
##
## Attaching package: 'fma'
## The following object is masked from 'package:plyr':
##
## ozone
## Loading required package: expsmooth
library(broom)
library(tidyverse)
library(sweep)
library(tseries)
detach("package:dplyr", unload=TRUE)
## Warning: 'dplyr' namespace cannot be unloaded:
## namespace 'dplyr' is imported by 'timetk', 'tigris', 'broom', 'tidycensus', 'sweep' so cannot be unloaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
## The following object is masked from 'package:fma':
##
## pigs
library(e1071)
library(fpp2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
theme_set(theme_minimal())
Additive decomposition model
\(y_t = S_t + T_t + R_t\)
Multiplicative decompisition model
$y_t = S_t T_t R_t $
\(y_t\) is the data, \(S_t\) is the seasonal component, \(T_t\) is the trend-cycle component, the \(R_t\) is the remainder component.
ATTENTION: The additive decomposition is the most appropriate if the magnitude of the seaosnal fluctuations or the variation around the trend-cycle does not vary of the times series.
ATTENTION: if the variation due to seasonality is not of primary interest, the seasonally adjusted series can be useful.
\(\hat{T_t} = \frac{1}{m}\sum_{j = -k}^{k}y_{t+j}\)
where \(m = 2k+1\)
autoplot(elecsales) +
labs(x = "Year", y = "Gwh",
title = "Annual Electricity Sales: South Australia")
x <- data.frame(time = index(elecsales),
data = as.numeric(elecsales))
x <- x %>%
mutate(data_5ma = rollmean(data, k = 5, fill = NA))
knitr::kable(x, format = "html")
| time | data | data_5ma |
|---|---|---|
| 1989 | 2354.34 | NA |
| 1990 | 2379.71 | NA |
| 1991 | 2318.52 | 2381.530 |
| 1992 | 2468.99 | 2424.556 |
| 1993 | 2386.09 | 2463.758 |
| 1994 | 2569.47 | 2552.598 |
| 1995 | 2575.72 | 2627.700 |
| 1996 | 2762.72 | 2750.622 |
| 1997 | 2844.50 | 2858.348 |
| 1998 | 3000.70 | 3014.704 |
| 1999 | 3108.10 | 3077.300 |
| 2000 | 3357.50 | 3144.520 |
| 2001 | 3075.70 | 3188.700 |
| 2002 | 3180.60 | 3202.320 |
| 2003 | 3221.60 | 3216.940 |
| 2004 | 3176.20 | 3307.296 |
| 2005 | 3430.60 | 3398.754 |
| 2006 | 3527.48 | 3485.434 |
| 2007 | 3637.89 | NA |
| 2008 | 3655.00 | NA |
There are no values for either the first two ears of the last two year, because we do not have two observations on either side.
autoplot(elecsales, series = "Data") +
autolayer(ma(elecsales, 5), series = "5-MA") +
labs(x = "year", y = "GWH",
title = "Annual electricity series: South Australia")
## Warning: Removed 4 rows containing missing values (geom_path).
beer2 <- window(ausbeer, start = 1992)
beer2_data <- data.frame(date = index(beer2),
observation = as.numeric(beer2))
beer2_data %>%
mutate(ma4 = ma(observation, order = 4, centre = F),
ma2_4 = ma(observation, order = 4, centre = T)) %>%
knitr::kable()
| date | observation | ma4 | ma2_4 |
|---|---|---|---|
| 1992.00 | 443 | NA | NA |
| 1992.25 | 410 | 451.25 | NA |
| 1992.50 | 420 | 448.75 | 450.000 |
| 1992.75 | 532 | 451.50 | 450.125 |
| 1993.00 | 433 | 449.00 | 450.250 |
| 1993.25 | 421 | 444.00 | 446.500 |
| 1993.50 | 410 | 448.00 | 446.000 |
| 1993.75 | 512 | 438.00 | 443.000 |
| 1994.00 | 449 | 441.25 | 439.625 |
| 1994.25 | 381 | 446.00 | 443.625 |
| 1994.50 | 423 | 440.25 | 443.125 |
| 1994.75 | 531 | 447.00 | 443.625 |
| 1995.00 | 426 | 445.25 | 446.125 |
| 1995.25 | 408 | 442.50 | 443.875 |
| 1995.50 | 416 | 438.25 | 440.375 |
| 1995.75 | 520 | 435.75 | 437.000 |
| 1996.00 | 409 | 431.25 | 433.500 |
| 1996.25 | 398 | 428.00 | 429.625 |
| 1996.50 | 398 | 433.75 | 430.875 |
| 1996.75 | 507 | 433.75 | 433.750 |
| 1997.00 | 432 | 435.75 | 434.750 |
| 1997.25 | 398 | 440.50 | 438.125 |
| 1997.50 | 406 | 439.50 | 440.000 |
| 1997.75 | 526 | 439.25 | 439.375 |
| 1998.00 | 428 | 438.50 | 438.875 |
| 1998.25 | 397 | 436.25 | 437.375 |
| 1998.50 | 403 | 438.00 | 437.125 |
| 1998.75 | 517 | 434.50 | 436.250 |
| 1999.00 | 435 | 439.75 | 437.125 |
| 1999.25 | 383 | 440.75 | 440.250 |
| 1999.50 | 424 | 437.25 | 439.000 |
| 1999.75 | 521 | 442.00 | 439.625 |
| 2000.00 | 421 | 439.50 | 440.750 |
| 2000.25 | 402 | 434.25 | 436.875 |
| 2000.50 | 414 | 441.75 | 438.000 |
| 2000.75 | 500 | 436.25 | 439.000 |
| 2001.00 | 451 | 436.75 | 436.500 |
| 2001.25 | 380 | 434.75 | 435.750 |
| 2001.50 | 416 | 429.00 | 431.875 |
| 2001.75 | 492 | 436.00 | 432.500 |
| 2002.00 | 428 | 433.50 | 434.750 |
| 2002.25 | 408 | 437.00 | 435.250 |
| 2002.50 | 406 | 438.75 | 437.875 |
| 2002.75 | 506 | 431.75 | 435.250 |
| 2003.00 | 435 | 435.50 | 433.625 |
| 2003.25 | 380 | 431.50 | 433.500 |
| 2003.50 | 421 | 431.50 | 431.500 |
| 2003.75 | 490 | 434.00 | 432.750 |
| 2004.00 | 435 | 431.75 | 432.875 |
| 2004.25 | 390 | 422.75 | 427.250 |
| 2004.50 | 412 | 418.00 | 420.375 |
| 2004.75 | 454 | 421.25 | 419.625 |
| 2005.00 | 416 | 420.25 | 420.750 |
| 2005.25 | 403 | 427.25 | 423.750 |
| 2005.50 | 408 | 432.75 | 430.000 |
| 2005.75 | 482 | 428.50 | 430.625 |
| 2006.00 | 438 | 427.75 | 428.125 |
| 2006.25 | 386 | 430.00 | 428.875 |
| 2006.50 | 405 | 427.25 | 428.625 |
| 2006.75 | 491 | 426.50 | 426.875 |
| 2007.00 | 427 | 423.75 | 425.125 |
| 2007.25 | 383 | 419.25 | 421.500 |
| 2007.50 | 394 | 417.50 | 418.375 |
| 2007.75 | 473 | 419.25 | 418.375 |
| 2008.00 | 420 | 423.25 | 421.250 |
| 2008.25 | 390 | 427.00 | 425.125 |
| 2008.50 | 410 | 425.75 | 426.375 |
| 2008.75 | 488 | 427.75 | 426.750 |
| 2009.00 | 415 | 430.00 | 428.875 |
| 2009.25 | 398 | 430.00 | 430.000 |
| 2009.50 | 419 | 429.75 | 429.875 |
| 2009.75 | 488 | 423.75 | 426.750 |
| 2010.00 | 414 | NA | NA |
| 2010.25 | 374 | NA | NA |
autoplot(elecequip, series = "Data", color = "grey50", alpha = 0.5) +
autolayer(ma(elecequip, centre = T, order = 12), series = "12-MA", color = "red") +
labs(y = "New orders index",
title = "Electrical equipment manufacturing (Euro area)")
## Warning: Removed 12 rows containing missing values (geom_path).
Additive decomposition and multiplcative decomposition
elecequip %>% decompose(type = "multiplicative") %>%
autoplot() +
labs(title = "Classical multiplicative decomposiition of electrical equipment index")
elecequip %>% decompose(type = "additive") %>%
autoplot() +
labs(title = "Classical additive decomposiition of electrical equipment index")
elecequip %>%
seas(x11 = "") -> fit
autoplot(fit) +
labs("X11 decomposition of electrical equipment index")
ATTENTION: compare x11 and other classical time series decomposition method, we can see the X11 trend-cycle has captured the sudden fall in the data in searly 2009.
autoplot(elecequip, series = "Data") +
autolayer(trendcycle(fit), series = "Trend") +
autolayer(seasadj(fit), series = "Seasonally Adjusted") +
labs(y = "new order index",
title = "Electrical equipment manufacturing (Euro Area)") +
scale_color_manual(values = c("grey", "blue", "red"),
breaks = c("Data", "Trend", "Seasonally Adjusted"))
fit %>% seasonal() %>%
ggsubseriesplot() +
labs(y = "Seasonal", x = "",
title = "Seasonal sub-series plot of New order Index")
## 6.5 SEATS decomposition
SEATS stand for “Seasonal Extraction in ARIMA Time Series.” This procedure was originally developed at the Bank of Spain, and is now widely used by government agencies around the world. The procedure works only with quarterly and monthly data. so other seasonality data, such as daily, hourly data, or weekly data, require an alternative approach.
elecequip %>%
seas() -> fit_seats
fit_seats %>%
autoplot() +
labs(title = "SEATS decomposition of electrical equipment index")
SYL acronymfor “Seasonal and Trend decomposition using Loess”
ATTENTION: Advantages over classical, X11, SEATS
elecequip %>%
stl(t.window = 13, s.window = "periodic", robust = T) %>%
autoplot() +
labs(title = "STL of new order index")
strength of trend as
\(F_T = max(0, 1 - \frac{Var(R_t)}{Var(T_t + R_t)}\)
strendth of seasonality as
\(F_S = max(0, 1 - \frac{Var(R_t)}{Var(S_t + R_t)}\)
These method ive a measurement of strength of trend and seasonality, if series with seasonal or trend strength \(F_S\) \(F_T\) is close to 1, we can say the data exhibut strong seasonality/trend.
ATTENTION: These method could be useful if you want to analyze a large collection of time series, and want to find
fit <- stl(elecequip, t.window = 13, s.window = "periodic", robust = T)
fit %>%
seasadj() %>%
naive() %>%
autoplot() +
labs(y = "New orders index", x = "",
title = "Naive forecasts of seasonally adjusted data")
ATTENTION: This is the naive forecasts of the seasonally adjusted electrical equipment orders data. We can “reseasonalised” by adding the seasonal naive forecasts of the seasonal component.
fit %>% forecast(method = "naive") %>%
autoplot() +
labs(y = "New orders index", title = "Forecasts from STL + Random Walk",
x = "")
use beer2 as example
beer2_data %>%
mutate(ma_5ma = ma(observation, order = 5, centre = F),
ma_3_5ma = ma(ma_5ma, order = 3, centre = T)) %>%
head(10) %>%
knitr::kable()
| date | observation | ma_5ma | ma_3_5ma |
|---|---|---|---|
| 1992.00 | 443 | NA | NA |
| 1992.25 | 410 | NA | NA |
| 1992.50 | 420 | 447.6 | NA |
| 1992.75 | 532 | 443.2 | 444.6667 |
| 1993.00 | 433 | 443.2 | 449.3333 |
| 1993.25 | 421 | 461.6 | 449.9333 |
| 1993.50 | 410 | 445.0 | 447.0667 |
| 1993.75 | 512 | 434.6 | 438.2000 |
| 1994.00 | 449 | 435.0 | 442.9333 |
| 1994.25 | 381 | 459.2 | 445.4000 |
autoplot(plastics)
we can see the seasonal fuctuations and upward trend.
plastics %>% decompose(type = "multiplicative") ->
fit_multi
fit_multi %>%
autoplot()
Yes, it does
seasadj(fit_multi)
## Jan Feb Mar Apr May Jun Jul
## 1 967.3468 981.2262 999.3182 986.4758 985.8925 956.7826 1001.1759
## 2 966.0431 985.4495 996.7427 1023.8257 1051.9377 1057.0417 1108.5982
## 3 1168.1168 1116.3736 1139.6864 1158.9443 1152.4413 1146.0648 1119.7701
## 4 1239.8204 1212.1029 1207.9388 1218.2647 1219.4437 1229.0378 1277.0364
## 5 1342.8129 1452.8342 1450.0416 1411.6051 1405.1361 1414.8628 1384.4587
## Aug Sep Oct Nov Dec
## 1 992.4139 981.0263 951.4241 978.9119 939.8819
## 2 1100.9592 1089.0366 1090.2260 1074.6860 1081.5244
## 3 1171.9625 1196.2349 1222.2981 1179.5334 1227.9683
## 4 1269.0820 1302.6210 1345.9580 1414.4319 1451.2352
## 5 1312.3369 1240.9008 1194.5377 1128.1178 1215.9647
autoplot(plastics, series = "Data", color = "grey50") +
autolayer(seasadj(fit_multi), series = "Seasonally Adjusted", color = "red")
plastics_outlier <- plastics
plastics_outlier[40] <- plastics_outlier[40] + 500
plastics_outlier %>% decompose(type = "multiplicative") ->
fit_out
autoplot(fit_out)
a <- autoplot(plastics_outlier, series = "Data", color = "grey50") +
autolayer(seasadj(fit_out), series = "Seasonally Adjusted", color = "red")
a
b <- autoplot(plastics, series = "Data", color = "grey50") +
autolayer(seasadj(fit_multi), series = "Seasonally Adjusted", color = "red")
grid.arrange(a,b, ncol = 1)
outlier have little effect on general trend, but have effect on seasonaly adjusted data, just like original data.
if the outlier is near the end, same amount of increase have less effect than in the middle of the time series. Because the trend is general upward, to have same amount of effect, the outlier shouold be larger in the end than middle.
retaildata <- readxl::read_excel("/Users/yifeiliu/Documents/R/data/book_exercise/forecasting/retail.xlsx", skip = 1)
myts <- ts(retaildata[,"A3349873A"],
frequency=12, start=c(1982,4))
autoplot(myts)
myts %>% seas(x11 = "") -> fit
autoplot(fit)
classical_fit <- myts %>% decompose(type = "multiplicative")
autoplot(classical_fit)
put the classical decomposition with X11 side by side, we can observer X11 decomposite result should large outlier around 2010-2014. And X11 decomposite result also show seasonal effect start decrease around the end of dataset.
stl_labour <- stl(labour, s.window = 13, robust = TRUE)
autoplot(stl_labour) +
labs(title = "STL decomposition of civilian labour force")
ggsubseriesplot(seasonal(stl_labour)) +
labs(y = "seasonal", title = "")
autoplot(labour, series = "Data", color = "grey50") +
autolayer(trendcycle(stl_labour), series = "Trend", color = "red") +
autolayer(seasadj(stl_labour), series = "Seasonally Adjusted", color = "blue") +
labs(x = "", y = "", title = "Number of persons in the civilian labour force in Australia each month")
overall, the numer of people in the civilian labour force increase over time. There were some recession during 1991-1992, seasonally adjusted data were able to capture this event. From subseriesplot, we can observe the seasonal fluctuation is relative small.
It’s visible in seasonal adjusted data.
autoplot(cangas)
ggsubseriesplot(cangas)
ggseasonplot(cangas, polar = T)
In the early stage, the production of gas higher in winer than summer, due to cold weather. But the production of gas goes up during summer as time goes on, probably due to increase use of AC in the summer.
fit_stl <- stl(cangas, s.window = 13, robust = T)
autoplot(fit_stl) +
labs(title = "Monthly Canadian gas production",
subtitle = "STL method", x= "")
we can observe the seasonal component increase at the begining of 70 and decrease around 90. And from trend component we can observe a general upward trend, but the trend is also flat during 70-87 period.
autoplot(cangas, series = "Data", color = "grey50") +
autolayer(trendcycle(fit_stl), series = "Trend", color = "red") +
autolayer(seasadj(fit_stl), series = "Seasonally Adjusted", color = "blue") +
labs(x = "", y = "", title = "Monthly Canadian gas production, STL decomposition")
fit_seats <- cangas %>% seas()
fit_x11 <- cangas %>% seas(x11 = "")
autoplot(fit_x11)
autoplot(fit_seats)
seas method use multiplicative decompisition method, so the mean of remainder for both SEATS and X11 are one. and mean of remainder for STL is zero. The trend is same among all three method, both increase and remain the same then increase again.
For seasonal component, we can observe that the seas method have a high seasonal compoent then decrease and increase. For STL method the seasoanl compoent is increase in the middle of data and decrease again.
# fixed seasonality
fit_stl_fix <- bricksq %>% stl(s.window = "period", robust = T)
fit_stl_float <- bricksq %>% stl(s.window = 5, robust = T)
a <- autoplot(fit_stl_fix) + labs(title = "fixed", x = "")
b <- autoplot(fit_stl_float) + labs(title = "floated", x= "")
grid.arrange(a,b, ncol = 2)
seasonal component is constant in fixed STL method and change over time in float method.
autoplot(bricksq, series = "Data", color = "grey50") +
autolayer(seasadj(fit_stl_fix), series = "Seasonally Adjusted", color = "blue") +
autolayer(seasadj(fit_stl_float), series = "Seasonally Adjusted", color = "red") +
labs(x = "", y = "")
By plotting the seasonally adjusted data on the same chart, we can see the floating method is more smooth than the fixed method.
for_naive <- naive(bricksq)
autoplot(for_naive)
fit_stl_fix %>% forecast(method = "naive") -> for_fix
fit_stl_float %>% forecast(method = "naive") -> for_float
autoplot(bricksq) +
autolayer(for_fix, series = "Fixed") +
autolayer(for_float, series = "Floated")
The range of prediction interval from two method are almost identical.
bricksq %>% stlf() -> for_stlf
autoplot(for_stlf) + labs(x = "", y = "", title = "stlf")
checkresiduals(for_stlf)
## Warning in checkresiduals(for_stlf): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,N,N)
## Q* = 41.128, df = 6, p-value = 2.733e-07
##
## Model df: 2. Total lags used: 8
Box.test(for_stlf$residuals, lag = 10, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: for_stlf$residuals
## X-squared = 42.86, df = 10, p-value = 5.267e-06
x <- Box.test(for_stlf$residuals, lag = 10, type = "Ljung-Box")
The residuals p-value is 5.267418510^{-6}
Comments on classical decompisition
ATTENTION:
the estimate of the trend-cycle is unavailable for the first and last few observations, and no estimate of the remainder component for the same time periods.
The trend-cycle estiamte tends to over-smooth rapid rises and falls in the data
Classical decomposition methods assume that the seasonal component repeat from year to year. in come place it may not hold true, like electricity demand pattern have changed over time as AC have become more widespread.
Occasionally, the values of the time series in a small number of periods may be particularly unusual. The classical method is not robust ot these kinds of unusual values.